In [145]:
import pandas as pd
import numpy as np
import plotly.express as px
pd.options.plotting.backend = 'plotly'
from lec_utils import * # Feel free to uncomment and use this. It'll make your plotly graphs look like ours in lecture!
Step 1: Introduction¶
In [146]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
data = pd.read_csv('2024_LoL_esports_match_data_from_OraclesElixir.csv')
data.head(12)
Out[146]:
| gameid | datacompleteness | url | league | year | split | playoffs | date | game | patch | participantid | side | position | playername | playerid | teamname | teamid | champion | ban1 | ban2 | ban3 | ban4 | ban5 | pick1 | pick2 | pick3 | pick4 | pick5 | gamelength | result | kills | deaths | assists | teamkills | teamdeaths | doublekills | triplekills | quadrakills | pentakills | firstblood | firstbloodkill | firstbloodassist | firstbloodvictim | team kpm | ckpm | firstdragon | dragons | opp_dragons | elementaldrakes | opp_elementaldrakes | infernals | mountains | clouds | oceans | chemtechs | hextechs | dragons (type unknown) | elders | opp_elders | firstherald | heralds | opp_heralds | void_grubs | opp_void_grubs | firstbaron | barons | opp_barons | firsttower | towers | opp_towers | firstmidtower | firsttothreetowers | turretplates | opp_turretplates | inhibitors | opp_inhibitors | damagetochampions | dpm | damageshare | damagetakenperminute | damagemitigatedperminute | wardsplaced | wpm | wardskilled | wcpm | controlwardsbought | visionscore | vspm | totalgold | earnedgold | earned gpm | earnedgoldshare | goldspent | gspd | gpr | total cs | minionkills | monsterkills | monsterkillsownjungle | monsterkillsenemyjungle | cspm | goldat10 | xpat10 | csat10 | opp_goldat10 | opp_xpat10 | opp_csat10 | golddiffat10 | xpdiffat10 | csdiffat10 | killsat10 | assistsat10 | deathsat10 | opp_killsat10 | opp_assistsat10 | opp_deathsat10 | goldat15 | xpat15 | csat15 | opp_goldat15 | opp_xpat15 | opp_csat15 | golddiffat15 | xpdiffat15 | csdiffat15 | killsat15 | assistsat15 | deathsat15 | opp_killsat15 | opp_assistsat15 | opp_deathsat15 | goldat20 | xpat20 | csat20 | opp_goldat20 | opp_xpat20 | opp_csat20 | golddiffat20 | xpdiffat20 | csdiffat20 | killsat20 | assistsat20 | deathsat20 | opp_killsat20 | opp_assistsat20 | opp_deathsat20 | goldat25 | xpat25 | csat25 | opp_goldat25 | opp_xpat25 | opp_csat25 | golddiffat25 | xpdiffat25 | csdiffat25 | killsat25 | assistsat25 | deathsat25 | opp_killsat25 | opp_assistsat25 | opp_deathsat25 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 1 | Blue | top | Zika | oe:player:65ed20b21e2993fb00dbd21a2fd991b | LNG Esports | oe:team:a9145b7711873f53e610fbba0493484 | Aatrox | Akali | Nocturne | K'Sante | Lee Sin | Wukong | NaN | NaN | NaN | NaN | NaN | 1886 | 0 | 1 | 3 | 1 | 3 | 16 | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.10 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 7092 | 225.62 | 0.17 | 564.15 | NaN | 14 | 0.45 | 4 | 0.13 | 5 | 24 | 0.76 | 11083 | 6960 | 221.42 | 0.24 | 10784 | NaN | NaN | 279.0 | 256.0 | 23 | 16.0 | 0.0 | 8.88 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 2 | Blue | jng | Weiwei | oe:player:57da8dfcfbdb4e5b019fe93003db1c4 | LNG Esports | oe:team:a9145b7711873f53e610fbba0493484 | Maokai | Akali | Nocturne | K'Sante | Lee Sin | Wukong | NaN | NaN | NaN | NaN | NaN | 1886 | 0 | 0 | 4 | 3 | 3 | 16 | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.10 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 7361 | 234.18 | 0.18 | 847.48 | NaN | 10 | 0.32 | 12 | 0.38 | 10 | 39 | 1.24 | 8636 | 4513 | 143.57 | 0.15 | 8840 | NaN | NaN | 153.0 | 14.0 | 139 | 111.0 | 3.0 | 4.87 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 3 | Blue | mid | Scout | oe:player:71e79ef80600d398d90cfebe3b0b758 | LNG Esports | oe:team:a9145b7711873f53e610fbba0493484 | Orianna | Akali | Nocturne | K'Sante | Lee Sin | Wukong | NaN | NaN | NaN | NaN | NaN | 1886 | 0 | 0 | 2 | 0 | 3 | 16 | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.10 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 10005 | 318.29 | 0.24 | 432.22 | NaN | 4 | 0.13 | 8 | 0.25 | 2 | 31 | 0.99 | 10743 | 6620 | 210.60 | 0.23 | 10594 | NaN | NaN | 270.0 | 269.0 | 1 | 1.0 | 0.0 | 8.59 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 4 | Blue | bot | GALA | oe:player:867e8957fae1cb59f0808dbcc3aada2 | LNG Esports | oe:team:a9145b7711873f53e610fbba0493484 | Kalista | Akali | Nocturne | K'Sante | Lee Sin | Wukong | NaN | NaN | NaN | NaN | NaN | 1886 | 0 | 2 | 4 | 0 | 3 | 16 | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.10 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 10892 | 346.51 | 0.26 | 491.45 | NaN | 22 | 0.70 | 13 | 0.41 | 4 | 44 | 1.40 | 12224 | 8101 | 257.72 | 0.28 | 11119 | NaN | NaN | 311.0 | 307.0 | 4 | 0.0 | 0.0 | 9.89 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 5 | Blue | sup | Mark | oe:player:a74c2977c1fc826e9e7bdb6b224a141 | LNG Esports | oe:team:a9145b7711873f53e610fbba0493484 | Senna | Akali | Nocturne | K'Sante | Lee Sin | Wukong | NaN | NaN | NaN | NaN | NaN | 1886 | 0 | 0 | 3 | 3 | 3 | 16 | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.10 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 6451 | 205.23 | 0.15 | 239.71 | NaN | 47 | 1.50 | 22 | 0.70 | 12 | 111 | 3.53 | 7221 | 3098 | 98.56 | 0.11 | 6175 | NaN | NaN | 30.0 | 30.0 | 0 | 0.0 | 0.0 | 0.95 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 5 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 6 | Red | top | Xiaoxu | oe:player:8007ba25dee37ac1e4445a6e9f8d252 | Rare Atom | oe:team:8516ca63facc91286d6c00212ca945e | Rumble | Poppy | Ashe | Neeko | Vi | Jarvan IV | NaN | NaN | NaN | NaN | NaN | 1886 | 1 | 4 | 0 | 6 | 16 | 3 | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | 0.51 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 14049 | 446.95 | 0.25 | 228.90 | NaN | 11 | 0.35 | 3 | 0.10 | 5 | 26 | 0.83 | 13378 | 9255 | 294.43 | 0.23 | 11179 | NaN | NaN | 283.0 | 245.0 | 38 | 15.0 | 6.0 | 9.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 6 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 7 | Red | jng | naiyou | oe:player:a7b51467f09577883d7150f37393964 | Rare Atom | oe:team:8516ca63facc91286d6c00212ca945e | Rell | Poppy | Ashe | Neeko | Vi | Jarvan IV | NaN | NaN | NaN | NaN | NaN | 1886 | 1 | 1 | 0 | 12 | 16 | 3 | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.51 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3945 | 125.50 | 0.07 | 591.03 | NaN | 14 | 0.45 | 8 | 0.25 | 11 | 49 | 1.56 | 10590 | 6467 | 205.74 | 0.16 | 9455 | NaN | NaN | 169.0 | 12.0 | 157 | 91.0 | 21.0 | 5.38 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 8 | Red | mid | VicLa | oe:player:d709285b163a94af9d819e568c592ba | Rare Atom | oe:team:8516ca63facc91286d6c00212ca945e | LeBlanc | Poppy | Ashe | Neeko | Vi | Jarvan IV | NaN | NaN | NaN | NaN | NaN | 1886 | 1 | 4 | 0 | 7 | 16 | 3 | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.51 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 14917 | 474.56 | 0.26 | 444.11 | NaN | 10 | 0.32 | 14 | 0.45 | 8 | 44 | 1.40 | 14603 | 10480 | 333.40 | 0.25 | 12643 | NaN | NaN | 329.0 | 319.0 | 10 | 7.0 | 2.0 | 10.47 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 8 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 9 | Red | bot | Assum | oe:player:da3299cd46f1ad8a86bbadeb6b8a320 | Rare Atom | oe:team:8516ca63facc91286d6c00212ca945e | Varus | Poppy | Ashe | Neeko | Vi | Jarvan IV | NaN | NaN | NaN | NaN | NaN | 1886 | 1 | 7 | 1 | 5 | 16 | 3 | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.51 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 19516 | 620.87 | 0.34 | 268.57 | NaN | 11 | 0.35 | 14 | 0.45 | 7 | 41 | 1.30 | 15021 | 10898 | 346.70 | 0.27 | 13820 | NaN | NaN | 303.0 | 294.0 | 9 | 9.0 | 0.0 | 9.64 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 9 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 10 | Red | sup | Zorah | oe:player:937dc5479c6416d1ad4997538144f6e | Rare Atom | oe:team:8516ca63facc91286d6c00212ca945e | Renata Glasc | Poppy | Ashe | Neeko | Vi | Jarvan IV | NaN | NaN | NaN | NaN | NaN | 1886 | 1 | 0 | 2 | 13 | 16 | 3 | NaN | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | 0.51 | 0.6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4516 | 143.67 | 0.08 | 384.43 | NaN | 76 | 2.42 | 10 | 0.32 | 25 | 117 | 3.72 | 8145 | 4022 | 127.95 | 0.10 | 7110 | NaN | NaN | 16.0 | 16.0 | 0 | 0.0 | 0.0 | 0.51 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 10 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 100 | Blue | team | NaN | NaN | LNG Esports | oe:team:a9145b7711873f53e610fbba0493484 | NaN | Akali | Nocturne | K'Sante | Lee Sin | Wukong | Kalista | Senna | Orianna | Maokai | Aatrox | 1886 | 0 | 3 | 16 | 7 | 3 | 16 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 0.10 | 0.6 | NaN | 2.0 | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 2.0 | NaN | 2.0 | 9.0 | NaN | NaN | NaN | NaN | 0.0 | 1.0 | 41801 | 1329.83 | NaN | 2574.97 | NaN | 97 | 3.09 | 59 | 1.88 | 33 | 250 | 7.95 | 49907 | 29292 | 931.88 | NaN | 47512 | -0.13 | NaN | NaN | NaN | 167 | 127.0 | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 11 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 200 | Red | team | NaN | NaN | Rare Atom | oe:team:8516ca63facc91286d6c00212ca945e | NaN | Poppy | Ashe | Neeko | Vi | Jarvan IV | Renata Glasc | Varus | LeBlanc | Rell | Rumble | 1886 | 1 | 16 | 3 | 43 | 16 | 3 | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | 0.51 | 0.6 | NaN | 3.0 | 2.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 0.0 | NaN | 9.0 | 2.0 | NaN | NaN | NaN | NaN | 1.0 | 0.0 | 56942 | 1811.52 | NaN | 1917.04 | NaN | 122 | 3.88 | 49 | 1.56 | 56 | 277 | 8.81 | 61737 | 41122 | 1308.23 | NaN | 54207 | 0.13 | NaN | NaN | NaN | 213 | 121.0 | 29.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
In [147]:
data.shape
Out[147]:
(117576, 161)
In [148]:
data.iloc[10:12]
Out[148]:
| gameid | datacompleteness | url | league | year | split | playoffs | date | game | patch | participantid | side | position | playername | playerid | teamname | teamid | champion | ban1 | ban2 | ban3 | ban4 | ban5 | pick1 | pick2 | pick3 | pick4 | pick5 | gamelength | result | kills | deaths | assists | teamkills | teamdeaths | doublekills | triplekills | quadrakills | pentakills | firstblood | firstbloodkill | firstbloodassist | firstbloodvictim | team kpm | ckpm | firstdragon | dragons | opp_dragons | elementaldrakes | opp_elementaldrakes | infernals | mountains | clouds | oceans | chemtechs | hextechs | dragons (type unknown) | elders | opp_elders | firstherald | heralds | opp_heralds | void_grubs | opp_void_grubs | firstbaron | barons | opp_barons | firsttower | towers | opp_towers | firstmidtower | firsttothreetowers | turretplates | opp_turretplates | inhibitors | opp_inhibitors | damagetochampions | dpm | damageshare | damagetakenperminute | damagemitigatedperminute | wardsplaced | wpm | wardskilled | wcpm | controlwardsbought | visionscore | vspm | totalgold | earnedgold | earned gpm | earnedgoldshare | goldspent | gspd | gpr | total cs | minionkills | monsterkills | monsterkillsownjungle | monsterkillsenemyjungle | cspm | goldat10 | xpat10 | csat10 | opp_goldat10 | opp_xpat10 | opp_csat10 | golddiffat10 | xpdiffat10 | csdiffat10 | killsat10 | assistsat10 | deathsat10 | opp_killsat10 | opp_assistsat10 | opp_deathsat10 | goldat15 | xpat15 | csat15 | opp_goldat15 | opp_xpat15 | opp_csat15 | golddiffat15 | xpdiffat15 | csdiffat15 | killsat15 | assistsat15 | deathsat15 | opp_killsat15 | opp_assistsat15 | opp_deathsat15 | goldat20 | xpat20 | csat20 | opp_goldat20 | opp_xpat20 | opp_csat20 | golddiffat20 | xpdiffat20 | csdiffat20 | killsat20 | assistsat20 | deathsat20 | opp_killsat20 | opp_assistsat20 | opp_deathsat20 | goldat25 | xpat25 | csat25 | opp_goldat25 | opp_xpat25 | opp_csat25 | golddiffat25 | xpdiffat25 | csdiffat25 | killsat25 | assistsat25 | deathsat25 | opp_killsat25 | opp_assistsat25 | opp_deathsat25 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 100 | Blue | team | NaN | NaN | LNG Esports | oe:team:a9145b7711873f53e610fbba0493484 | NaN | Akali | Nocturne | K'Sante | Lee Sin | Wukong | Kalista | Senna | Orianna | Maokai | Aatrox | 1886 | 0 | 3 | 16 | 7 | 3 | 16 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 0.10 | 0.6 | NaN | 2.0 | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 2.0 | NaN | 2.0 | 9.0 | NaN | NaN | NaN | NaN | 0.0 | 1.0 | 41801 | 1329.83 | NaN | 2574.97 | NaN | 97 | 3.09 | 59 | 1.88 | 33 | 250 | 7.95 | 49907 | 29292 | 931.88 | NaN | 47512 | -0.13 | NaN | NaN | NaN | 167 | 127.0 | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 11 | 10660-10660_game_1 | partial | https://lpl.qq.com/es/stats.shtml?bmid=10660 | DCup | 2023 | NaN | 0 | 2024-01-01 05:13:15 | 1 | 13.24 | 200 | Red | team | NaN | NaN | Rare Atom | oe:team:8516ca63facc91286d6c00212ca945e | NaN | Poppy | Ashe | Neeko | Vi | Jarvan IV | Renata Glasc | Varus | LeBlanc | Rell | Rumble | 1886 | 1 | 16 | 3 | 43 | 16 | 3 | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | 0.51 | 0.6 | NaN | 3.0 | 2.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 0.0 | NaN | 9.0 | 2.0 | NaN | NaN | NaN | NaN | 1.0 | 0.0 | 56942 | 1811.52 | NaN | 1917.04 | NaN | 122 | 3.88 | 49 | 1.56 | 56 | 277 | 8.81 | 61737 | 41122 | 1308.23 | NaN | 54207 | 0.13 | NaN | NaN | NaN | 213 | 121.0 | 29.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Step 2: Data Cleaning and Exploratory Data Analysis¶
2.1 Extract team data and target columns¶
In [149]:
target_columns = ['result', 'side', 'firstblood', 'firstdragon', 'firstbaron', 'firsttower',
'firstmidtower', 'firsttothreetowers', 'gamelength', 'golddiffat10', 'golddiffat15',
'golddiffat20', 'xpdiffat10', 'xpdiffat15', 'xpdiffat20']
data = data.loc[data['position'] == 'team', target_columns].reset_index().drop('index', axis=1)
data.head()
Out[149]:
| result | side | firstblood | firstdragon | firstbaron | firsttower | firstmidtower | firsttothreetowers | gamelength | golddiffat10 | golddiffat15 | golddiffat20 | xpdiffat10 | xpdiffat15 | xpdiffat20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Blue | 0.0 | NaN | NaN | NaN | NaN | NaN | 1886 | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 1 | Red | 1.0 | NaN | NaN | NaN | NaN | NaN | 1886 | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 0 | Blue | 0.0 | NaN | NaN | NaN | NaN | NaN | 1911 | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 1 | Red | 1.0 | NaN | NaN | NaN | NaN | NaN | 1911 | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 1 | Blue | 1.0 | NaN | NaN | NaN | NaN | NaN | 1324 | NaN | NaN | NaN | NaN | NaN | NaN |
In [150]:
data.shape
Out[150]:
(19596, 15)
2.2 Check and modify NaN¶
In [151]:
data.isna().sum()
Out[151]:
result 0 side 0 firstblood 0 firstdragon 2782 firstbaron 2782 firsttower 2782 firstmidtower 2784 firsttothreetowers 2782 gamelength 0 golddiffat10 2784 golddiffat15 2786 golddiffat20 2822 xpdiffat10 2784 xpdiffat15 2786 xpdiffat20 2822 dtype: int64
In [152]:
need_drop = ['firsttower', 'firstmidtower', 'firsttothreetowers', 'golddiffat10', 'golddiffat15', 'golddiffat20',
'xpdiffat10', 'xpdiffat15', 'xpdiffat20']
data = data.dropna(subset=need_drop)
data.isna().sum()
Out[152]:
result 0 side 0 firstblood 0 firstdragon 0 firstbaron 0 firsttower 0 firstmidtower 0 firsttothreetowers 0 gamelength 0 golddiffat10 0 golddiffat15 0 golddiffat20 0 xpdiffat10 0 xpdiffat15 0 xpdiffat20 0 dtype: int64
In [153]:
data.shape
Out[153]:
(16774, 15)
2.3 Categorize Gamelength¶
In [154]:
data['gamelength'].min()
Out[154]:
1143
In [155]:
data['gamelength'].max()
Out[155]:
3482
In [156]:
fig = px.histogram(
data,
x='gamelength',
nbins=150,
title='Game Count by Game Duration (seconds)',
marginal='box',
color_discrete_sequence=['#AB63FA'],
width=700,
height=400
)
fig.update_layout(
xaxis_title='Game Duration (seconds)',
yaxis_title='Number of Games'
)
fig.show()
In [157]:
fig.write_html("gamelength_hist.html")
In [158]:
gametime = ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']
def group_time(time):
if time <= 1499:
return gametime[0]
elif 1500 <= time <= 1799:
return gametime[1]
elif 1800 <= time <= 2099:
return gametime[2]
elif 2100 <= time <= 2399:
return gametime[3]
else:
return gametime[4]
data = data.assign(time_label = data['gamelength'].apply(group_time)).drop('gamelength', axis=1)
In [159]:
data['time_label'].value_counts()
Out[159]:
time_label 30-35(mins) 5522 25-30(mins) 5348 35-40(mins) 2714 <=25(mins) 1786 >=40(mins) 1404 Name: count, dtype: int64
In [160]:
data['time_label'].value_counts().reindex(gametime)
Out[160]:
time_label <=25(mins) 1786 25-30(mins) 5348 30-35(mins) 5522 35-40(mins) 2714 >=40(mins) 1404 Name: count, dtype: int64
In [161]:
counts = data['time_label'].value_counts().reindex(gametime).reset_index()
fig = px.bar(
counts,
x='time_label',
y='count',
title='Game Count by Game Duration (minutes)',
color_discrete_sequence=['#AB63FA'],
width=700,
height=400
)
fig.update_layout(
xaxis_title='Game Duration (minutes)',
yaxis_title='Number of Games'
)
fig.show()
In [162]:
fig.write_html("gameduration_hist.html")
2.4 Recategorize result as win¶
In [163]:
data = data.assign(win = data['result'].apply(lambda x: True if x == 1 else False)).drop('result', axis=1)
In [165]:
from tabulate import tabulate
print(data.head().to_markdown())
| | side | firstblood | firstdragon | firstbaron | firsttower | firstmidtower | firsttothreetowers | golddiffat10 | golddiffat15 | golddiffat20 | xpdiffat10 | xpdiffat15 | xpdiffat20 | time_label | win | |---:|:-------|-------------:|--------------:|-------------:|-------------:|----------------:|---------------------:|---------------:|---------------:|---------------:|-------------:|-------------:|-------------:|:-------------|:------| | 30 | Blue | 0 | 1 | 1 | 1 | 1 | 1 | 1364 | 2293 | 4248 | 557 | 949 | 2138 | <=25(mins) | True | | 31 | Red | 1 | 0 | 0 | 0 | 0 | 0 | -1364 | -2293 | -4248 | -557 | -949 | -2138 | <=25(mins) | False | | 32 | Blue | 0 | 0 | 0 | 0 | 0 | 0 | -88 | -75 | 777 | 625 | 1092 | 2722 | 35-40(mins) | True | | 33 | Red | 1 | 1 | 1 | 1 | 1 | 1 | 88 | 75 | -777 | -625 | -1092 | -2722 | 35-40(mins) | False | | 34 | Blue | 0 | 1 | 1 | 0 | 0 | 0 | -2583 | -561 | -1528 | -1718 | 410 | -722 | 30-35(mins) | True |
In [166]:
data.shape
Out[166]:
(16774, 15)
2.5 golddiffat10 distribution¶
In [191]:
df = data.loc[data['side'] == 'Red']
fig = px.histogram(
df,
x='golddiffat10',
nbins=150,
title='Team Count by Gold Difference at 10 minutes',
marginal='box',
color_discrete_sequence=['#FF4040'],
width=700,
height=400
)
fig.update_layout(
xaxis_title='Gold Difference at 10 minutes',
yaxis_title='Number of Teams'
)
lower, upper = df['golddiffat10'].quantile([0.025, 0.975])
fig.add_vline(
x=lower,
line_dash='dash',
line_color='red',
line_width=2,
annotation_text=f'2.5% ({lower:.0f})',
annotation_position='top left',
annotation_font_color='black',
annotation_bgcolor='white'
)
fig.add_vline(
x=upper,
line_dash='dash',
line_color='red',
line_width=2,
annotation_text=f'97.5% ({upper:.0f})',
annotation_position='top right',
annotation_font_color='black',
annotation_bgcolor='white'
)
fig.show()
In [194]:
df = data.loc[data['side'] == 'Blue']
fig = px.histogram(
df,
x='golddiffat10',
nbins=150,
title='Team Count by Gold Difference at 10 minutes',
marginal='box',
color_discrete_sequence=['#1E90FF'],
width=700,
height=400
)
fig.update_layout(
xaxis_title='Gold Difference at 10 minutes',
yaxis_title='Number of Teams'
)
lower, upper = df['golddiffat10'].quantile([0.025, 0.975])
fig.add_vline(
x=lower,
line_dash='dash',
line_color='#1E90FF',
line_width=2,
annotation_text=f'2.5% ({lower:.0f})',
annotation_position='top left',
annotation_font_color='black',
annotation_bgcolor='white'
)
fig.add_vline(
x=upper,
line_dash='dash',
line_color='#1E90FF',
line_width=2,
annotation_text=f'97.5% ({upper:.0f})',
annotation_position='top right',
annotation_font_color='black',
annotation_bgcolor='white'
)
fig.show()
In [206]:
df = data.loc[data['side'] == 'Red']
fig = px.histogram(
df,
x='xpdiffat10',
nbins=150,
title='Team Count by XP Difference at 10 minutes',
marginal='box',
color_discrete_sequence=['#CE2029'],
width=700,
height=400
)
fig.update_layout(
xaxis_title='XP Difference at 10 minutes',
yaxis_title='Number of Teams'
)
lower, upper = df['xpdiffat10'].quantile([0.025, 0.975])
fig.add_vline(
x=lower,
line_dash='dash',
line_color='#CE2029',
line_width=2,
annotation_text=f'2.5% ({lower:.0f})',
annotation_position='top left',
annotation_font_color='black',
annotation_bgcolor='white'
)
fig.add_vline(
x=upper,
line_dash='dash',
line_color='#CE2029',
line_width=2,
annotation_text=f'97.5% ({upper:.0f})',
annotation_position='top right',
annotation_font_color='black',
annotation_bgcolor='white'
)
fig.show()
In [207]:
fig.write_html("xp10_red.html")
In [208]:
df = data.loc[data['side'] == 'Blue']
fig = px.histogram(
df,
x='xpdiffat10',
nbins=150,
title='Team Count by XP Difference at 10 minutes',
marginal='box',
color_discrete_sequence=['#4682B4'],
width=700,
height=400
)
fig.update_layout(
xaxis_title='XP Difference at 10 minutes',
yaxis_title='Number of Teams'
)
lower, upper = df['xpdiffat10'].quantile([0.025, 0.975])
fig.add_vline(
x=lower,
line_dash='dash',
line_color='#4682B4',
line_width=2,
annotation_text=f'2.5% ({lower:.0f})',
annotation_position='top left',
annotation_font_color='black',
annotation_bgcolor='white'
)
fig.add_vline(
x=upper,
line_dash='dash',
line_color='#4682B4',
line_width=2,
annotation_text=f'97.5% ({upper:.0f})',
annotation_position='top right',
annotation_font_color='black',
annotation_bgcolor='white'
)
fig.show()
In [209]:
fig.write_html("xp10_blue.html")
2.6 First* Info Explore¶
In [210]:
target_columns = ['firstblood', 'firstdragon', 'firstbaron', 'firsttower',
'firstmidtower', 'firsttothreetowers']
data[target_columns] = data[target_columns] == 1
In [226]:
fig = (
data
.groupby(['side', 'firstblood'])
['win']
.mean()
.reset_index()
.plot(kind='bar',
x='firstblood',
y='win',
color='side',
barmode='group',
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
title='Win rate for each side and firstblood',
width=800)
)
fig
In [227]:
fig.write_html("win_rate_blood.html")
In [228]:
fig = (
data
.groupby(['side', 'firstdragon'])
['win']
.mean()
.reset_index()
.plot(kind='bar',
x='firstdragon',
y='win',
color='side',
barmode='group',
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
title='Win rate for each side and firstdragon',
width=800)
)
fig
In [229]:
fig.write_html("win_rate_dragon.html")
In [213]:
(
data
.groupby(['side', 'firstbaron'])
['win']
.mean()
.reset_index()
.plot(kind='bar',
x='firstbaron',
y='win',
color='side',
barmode='group',
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
title='Win rate for each side and firstbaron',
width=800)
)
In [214]:
(
data
.groupby(['side', 'firsttower'])
['win']
.mean()
.reset_index()
.plot(kind='bar',
x='firsttower',
y='win',
color='side',
barmode='group',
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
title='Win rate for each side and firsttower',
width=800)
)
In [215]:
(
data
.groupby(['side', 'firstmidtower'])
['win']
.mean()
.reset_index()
.plot(kind='bar',
x='firstmidtower',
y='win',
color='side',
barmode='group',
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
title='Win rate for each side and firstmidtower',
width=800)
)
In [216]:
(
data
.groupby(['side', 'firsttothreetowers'])
['win']
.mean()
.reset_index()
.plot(kind='bar',
x='firsttothreetowers',
y='win',
color='side',
barmode='group',
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
title='Win rate for each side and firsttothreetowers',
width=800)
)
In [217]:
df1 = (
data
.groupby(['side', 'firsttothreetowers'])
['win']
.mean()
.reset_index()
)
df2 = (
data
.groupby(['side', 'firstmidtower'])
['win']
.mean()
.reset_index()
)
df3 = (
data
.groupby(['side', 'firsttower'])
['win']
.mean()
.reset_index()
)
df4 = (
data
.groupby(['side', 'firstdragon'])
['win']
.mean()
.reset_index()
)
df5 = (
data
.groupby(['side', 'firstbaron'])
['win']
.mean()
.reset_index()
)
df6 = (
data
.groupby(['side', 'firstblood'])
['win']
.mean()
.reset_index()
)
In [218]:
df1 = df1.rename(columns={'firsttothreetowers': 'First Info Result'})
df1['First Info Detail'] = 'First to Three Towers'
df2 = df2.rename(columns={'firstmidtower': 'First Info Result'})
df2['First Info Detail'] = 'First Mid Tower'
df3 = df3.rename(columns={'firsttower': 'First Info Result'})
df3['First Info Detail'] = 'First Tower'
df4 = df4.rename(columns={'firstdragon': 'First Info Result'})
df4['First Info Detail'] = 'First Dragon'
df5 = df5.rename(columns={'firstbaron': 'First Info Result'})
df5['First Info Detail'] = 'First Baron'
df6 = df6.rename(columns={'firstblood': 'First Info Result'})
df6['First Info Detail'] = 'First Blood'
In [219]:
df_all = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=True)
df_all['Side_First_Info'] = df_all['side'] + ' - ' + df_all['First Info Result'].astype(str)
df_all
Out[219]:
| side | First Info Result | win | First Info Detail | Side_First_Info | |
|---|---|---|---|---|---|
| 0 | Blue | False | 0.21 | First to Three Towers | Blue - False |
| 1 | Blue | True | 0.76 | First to Three Towers | Blue - True |
| 2 | Red | False | 0.24 | First to Three Towers | Red - False |
| 3 | Red | True | 0.79 | First to Three Towers | Red - True |
| 4 | Blue | False | 0.26 | First Mid Tower | Blue - False |
| 5 | Blue | True | 0.72 | First Mid Tower | Blue - True |
| 6 | Red | False | 0.28 | First Mid Tower | Red - False |
| 7 | Red | True | 0.74 | First Mid Tower | Red - True |
| 8 | Blue | False | 0.33 | First Tower | Blue - False |
| 9 | Blue | True | 0.69 | First Tower | Blue - True |
| 10 | Red | False | 0.31 | First Tower | Red - False |
| 11 | Red | True | 0.67 | First Tower | Red - True |
| 12 | Blue | False | 0.47 | First Dragon | Blue - False |
| 13 | Blue | True | 0.62 | First Dragon | Blue - True |
| 14 | Red | False | 0.38 | First Dragon | Red - False |
| 15 | Red | True | 0.53 | First Dragon | Red - True |
| 16 | Blue | False | 0.20 | First Baron | Blue - False |
| 17 | Blue | True | 0.85 | First Baron | Blue - True |
| 18 | Red | False | 0.17 | First Baron | Red - False |
| 19 | Red | True | 0.84 | First Baron | Red - True |
| 20 | Blue | False | 0.43 | First Blood | Blue - False |
| 21 | Blue | True | 0.62 | First Blood | Blue - True |
| 22 | Red | False | 0.38 | First Blood | Red - False |
| 23 | Red | True | 0.57 | First Blood | Red - True |
In [220]:
color_map = {
'Blue - False': '#4B8BBE', # light blue
'Blue - True': '#306998', # darker blue
'Red - False': '#FF7F7F', # light red
'Red - True': '#D62728' # darker red
}
In [235]:
fig = df_all.plot(kind='bar',
x='First Info Detail',
y='win',
color='Side_First_Info',
barmode='group',
facet_col='side',
category_orders={'First Info Detail': ['First Blood', 'First Dragon', 'First Tower', 'First Mid Tower', 'First to Three Towers', 'First Baron']},
color_discrete_map=color_map,
title='Win Rate by Side and Tower'
)
fig.update_layout(
width=1000,
height=400
)
fig
In [236]:
fig.write_html("win_rate_side_tower.html")
2.7 Explore side¶
In [259]:
target_columns = ['firstblood', 'firstdragon', 'firstbaron', 'firsttower',
'firstmidtower', 'firsttothreetowers', 'golddiffat10', 'golddiffat15',
'golddiffat20', 'xpdiffat10', 'xpdiffat15', 'xpdiffat20', 'win']
df = data.groupby('side')[target_columns].mean()
df
Out[259]:
| firstblood | firstdragon | firstbaron | firsttower | firstmidtower | firsttothreetowers | golddiffat10 | golddiffat15 | golddiffat20 | xpdiffat10 | xpdiffat15 | xpdiffat20 | win | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| side | |||||||||||||
| Blue | 0.52 | 0.38 | 0.50 | 0.55 | 0.57 | 0.57 | 144.92 | 331.16 | 523.68 | 66.9 | 94.46 | 95.87 | 0.53 |
| Red | 0.48 | 0.61 | 0.46 | 0.45 | 0.43 | 0.43 | -144.92 | -331.16 | -523.68 | -66.9 | -94.46 | -95.87 | 0.47 |
In [260]:
from tabulate import tabulate
print(df.to_markdown())
| side | firstblood | firstdragon | firstbaron | firsttower | firstmidtower | firsttothreetowers | golddiffat10 | golddiffat15 | golddiffat20 | xpdiffat10 | xpdiffat15 | xpdiffat20 | win | |:-------|-------------:|--------------:|-------------:|-------------:|----------------:|---------------------:|---------------:|---------------:|---------------:|-------------:|-------------:|-------------:|---------:| | Blue | 0.516275 | 0.384643 | 0.501967 | 0.548706 | 0.572314 | 0.571837 | 144.923 | 331.158 | 523.683 | 66.8972 | 94.4559 | 95.871 | 0.527483 | | Red | 0.483725 | 0.61488 | 0.456421 | 0.451294 | 0.427686 | 0.428163 | -144.923 | -331.158 | -523.683 | -66.8972 | -94.4559 | -95.871 | 0.472517 |
In [261]:
df = data.pivot_table(index='side',
columns='time_label',
values='win',
aggfunc='mean').reindex(columns=gametime)
df
Out[261]:
| time_label | <=25(mins) | 25-30(mins) | 30-35(mins) | 35-40(mins) | >=40(mins) |
|---|---|---|---|---|---|
| side | |||||
| Blue | 0.6 | 0.52 | 0.52 | 0.51 | 0.53 |
| Red | 0.4 | 0.48 | 0.48 | 0.49 | 0.47 |
In [262]:
from tabulate import tabulate
print(df.to_markdown())
| side | <=25(mins) | 25-30(mins) | 30-35(mins) | 35-40(mins) | >=40(mins) | |:-------|-------------:|--------------:|--------------:|--------------:|-------------:| | Blue | 0.601344 | 0.522438 | 0.516117 | 0.511422 | 0.52849 | | Red | 0.398656 | 0.477562 | 0.483883 | 0.488578 | 0.47151 |
2.8 Explore (10/15/20) Info¶
In [35]:
(
data
.plot(kind='hist',
x='golddiffat10',
color='side',
nbins=50,
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
marginal='box',
title='Distribution of Gold Difference at 10 mins')
)
In [36]:
(
data
.plot(kind='hist',
x='golddiffat15',
color='side',
nbins=50,
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
marginal='box',
title='Distribution of Gold Difference at 15 mins')
)
In [37]:
(
data
.plot(kind='hist',
x='golddiffat20',
color='side',
nbins=50,
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
marginal='box',
title='Distribution of Gold Difference at 20 mins')
)
In [38]:
(
data
.plot(kind='hist',
x='golddiffat10',
color='win',
nbins=50,
color_discrete_map={
'True': 'olive',
'False': 'crimson'
},
marginal='box',
title='Distribution of Gold Difference at 10 mins')
)
In [39]:
(
data
.plot(kind='hist',
x='golddiffat15',
color='win',
nbins=50,
color_discrete_map={
'True': 'olive',
'False': 'crimson'
},
marginal='box',
title='Distribution of Gold Difference at 15 mins')
)
In [253]:
fig = (
data
.plot(kind='hist',
x='golddiffat20',
color='win',
nbins=50,
color_discrete_map={
'True': 'olive',
'False': 'crimson'
},
marginal='box',
title='Distribution of Gold Difference at 20 mins')
)
fig.update_layout(
width=800,
height=400
)
fig
In [254]:
fig.write_html("gold_20_hist.html")
In [41]:
(
data
.plot(kind='hist',
x='xpdiffat10',
color='side',
nbins=50,
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
marginal='box',
title='Distribution of XP Difference at 10 mins')
)
In [42]:
(
data
.plot(kind='hist',
x='xpdiffat15',
color='side',
nbins=50,
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
marginal='box',
title='Distribution of XP Difference at 15 mins')
)
In [43]:
(
data
.plot(kind='hist',
x='xpdiffat20',
color='side',
nbins=50,
color_discrete_map={
'Blue': 'steelblue',
'Red': 'crimson'
},
marginal='box',
title='Distribution of XP Difference at 20 mins')
)
In [44]:
(
data
.plot(kind='hist',
x='xpdiffat10',
color='win',
nbins=50,
color_discrete_map={
'True': 'olive',
'False': 'crimson'
},
marginal='box',
title='Distribution of XP Difference at 10 mins')
)
In [45]:
(
data
.plot(kind='hist',
x='xpdiffat15',
color='win',
nbins=50,
color_discrete_map={
'True': 'olive',
'False': 'crimson'
},
marginal='box',
title='Distribution of XP Difference at 15 mins')
)
In [257]:
fig = (
data
.plot(kind='hist',
x='xpdiffat20',
color='win',
nbins=50,
color_discrete_map={
'True': 'olive',
'False': 'crimson'
},
marginal='box',
title='Distribution of XP Difference at 20 mins')
)
fig.update_layout(
width=800,
height=400
)
fig
In [258]:
fig.write_html("xp_20_hist.html")
In [243]:
fig = (
data
.plot(kind='violin',
y='xpdiffat10',
color='time_label',
box=True,
category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
title='Distribution of XP Difference at 10 mins',
orientation='v',)
)
fig
In [244]:
fig.write_html("xp_gold_10_violin.html")
In [245]:
fig = px.scatter(
data,
x='golddiffat10',
y='xpdiffat10',
color='time_label', # Optional: color by game duration group
category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
title='XP Difference vs. Gold Difference at 10 Minutes',
labels={
'golddiffat10': 'Gold Difference at 10 Minutes',
'xpdiffat10': 'XP Difference at 10 Minutes',
'time_label': 'Game Duration Group'
},
width=800,
height=500
)
fig.show()
In [246]:
fig.write_html("xp_gold_10_scatter.html")
In [48]:
(
data
.plot(kind='violin',
y='xpdiffat15',
color='time_label',
box=True,
category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
title='Distribution of XP Difference at 15 mins',
orientation='v',)
)
In [49]:
(
data
.plot(kind='violin',
y='xpdiffat20',
color='time_label',
box=True,
category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
title='Distribution of XP Difference at 20 mins',
orientation='v',)
)
In [50]:
(
data
.plot(kind='violin',
y='golddiffat10',
color='time_label',
box=True,
category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
title='Distribution of Gold Difference at 10 mins',
orientation='v',)
)
In [51]:
(
data
.plot(kind='violin',
y='golddiffat15',
color='time_label',
box=True,
category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
title='Distribution of Gold Difference at 15 mins',
orientation='v',)
)
In [52]:
(
data
.plot(kind='violin',
y='golddiffat20',
color='time_label',
box=True,
category_orders={'time_label': ['<=25(mins)', '25-30(mins)', '30-35(mins)', '35-40(mins)', '>=40(mins)']},
title='Distribution of Gold Difference at 20 mins',
orientation='v',)
)
Step 3: Framing a Prediction Problem¶
In [53]:
# TODO
# Make prediction of winner
Step 4: Baseline Model¶
In [455]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import make_column_transformer
X = data[['side', 'firstbaron', 'xpdiffat10']]
y = data['win']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)
def baseline_model(X_train, y_train):
preprocessor = make_column_transformer(
(OneHotEncoder(drop='first', handle_unknown='ignore'), ['side', 'firstbaron']),
(StandardScaler(), ['xpdiffat10'])
)
model = make_pipeline(preprocessor, LogisticRegression())
model.fit(X_train, y_train)
return model
base = baseline_model(X_train, y_train)
base
Out[455]:
Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first',
handle_unknown='ignore'),
['side', 'firstbaron']),
('standardscaler',
StandardScaler(),
['xpdiffat10'])])),
('logisticregression', LogisticRegression())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first',
handle_unknown='ignore'),
['side', 'firstbaron']),
('standardscaler',
StandardScaler(),
['xpdiffat10'])])),
('logisticregression', LogisticRegression())])ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first',
handle_unknown='ignore'),
['side', 'firstbaron']),
('standardscaler', StandardScaler(),
['xpdiffat10'])])['side', 'firstbaron']
OneHotEncoder(drop='first', handle_unknown='ignore')
['xpdiffat10']
StandardScaler()
LogisticRegression()
In [457]:
logit_model = base.named_steps['logisticregression']
preprocessor = base.named_steps['columntransformer']
encoded_cat_names = preprocessor.named_transformers_['onehotencoder'].get_feature_names_out(['side', 'firstbaron'])
feature_names = list(encoded_cat_names) + ['xpdiffat10']
import pandas as pd
coefs = logit_model.coef_[0] # shape: (n_features,)
coef_df = pd.DataFrame({'Feature': feature_names, 'Coefficient': coefs})
coef_df['AbsCoeff'] = coef_df['Coefficient'].abs()
coef_df = coef_df.sort_values(by='AbsCoeff', ascending=False)
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 5))
plt.barh(coef_df['Feature'], coef_df['Coefficient'], color='skyblue')
plt.axvline(0, color='black', linestyle='--')
plt.xlabel('Coefficient')
plt.title('Logistic Regression Feature Importance')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
In [458]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
y_pred = base.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels=base.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
display_labels=base.classes_)
acc = accuracy_score(y_test, y_pred)
disp.plot()
plt.title('Confusion Matrix')
plt.grid(False)
plt.text(0.5, -0.15, f'Accuracy: {acc:.4f}',
fontsize=12,
ha='center',
va='center',
transform=plt.gca().transAxes)
plt.tight_layout()
plt.savefig("base_cm.png", dpi=150, bbox_inches='tight')
plt.show()
In [459]:
y_prob = base.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Logistic Regression')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
plt.savefig("base_roc.png", dpi=300, bbox_inches='tight')
plt.show()
Step 5: Final Model¶
In [554]:
import numpy as np
from sklearn.preprocessing import FunctionTransformer
def compute_per_min(X):
return ((X.iloc[:, 0] / 10 + X.iloc[:, 1] / 15 + X.iloc[:, 2] / 20) / 3).to_numpy().reshape(-1, 1)
def compute_tower_score(X):
return X.sum(axis=1).to_numpy().reshape(-1, 1)
def compute_diff_drop_rate(X):
return (X.iloc[:, 0] - X.iloc[:, 1]).to_numpy().reshape(-1, 1)
X = data[['side', 'firstbaron', 'firsttothreetowers', 'firstmidtower', 'firsttower', 'firstdragon', 'firstblood',
'xpdiffat10', 'xpdiffat15', 'xpdiffat20', 'golddiffat10', 'golddiffat15', 'golddiffat20']]
y = data['win']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)
def final_model_1(X_train, y_train):
xp_per_min_transformer = make_pipeline(
FunctionTransformer(func=compute_per_min),
StandardScaler()
)
gold_per_min_transformer = make_pipeline(
FunctionTransformer(func=compute_per_min),
StandardScaler()
)
tower_score_transformer = make_pipeline(
FunctionTransformer(func=compute_tower_score),
StandardScaler()
)
gold_drop_1015_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
gold_drop_1520_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
xp_drop_1015_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
xp_drop_1520_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
preprocessor = make_column_transformer(
(OneHotEncoder(drop='first'), ['side', 'firstbaron', 'firstdragon', 'firstblood']),
(xp_per_min_transformer, ['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
(gold_per_min_transformer, ['golddiffat10', 'golddiffat15', 'golddiffat20']),
(tower_score_transformer, ['firsttower', 'firstmidtower', 'firsttothreetowers']),
(gold_drop_1015_transformer, ['golddiffat10', 'golddiffat15']),
(gold_drop_1520_transformer, ['golddiffat15', 'golddiffat20']),
(xp_drop_1015_transformer, ['xpdiffat10', 'xpdiffat15']),
(xp_drop_1520_transformer, ['xpdiffat15', 'xpdiffat20']),
)
model = make_pipeline(preprocessor, LogisticRegression())
model.fit(X_train, y_train)
return model
final1 = final_model_1(X_train, y_train)
final1
Out[554]:
Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first'),
['side', 'firstbaron',
'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15',
'x...
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15']),
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15',
'xpdiffat20'])])),
('logisticregression', LogisticRegression())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first'),
['side', 'firstbaron',
'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15',
'x...
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15']),
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15',
'xpdiffat20'])])),
('logisticregression', LogisticRegression())])ColumnTransformer(transformers=[('onehotencoder', OneHotEncoder(drop='first'),
['side', 'firstbaron', 'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
('pipeline-2',
Pipeline(s...
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15']),
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15', 'xpdiffat20'])])['side', 'firstbaron', 'firstdragon', 'firstblood']
OneHotEncoder(drop='first')
['xpdiffat10', 'xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)
StandardScaler()
['golddiffat10', 'golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)
StandardScaler()
['firsttower', 'firstmidtower', 'firsttothreetowers']
FunctionTransformer(func=<function compute_tower_score at 0x138efb490>)
StandardScaler()
['golddiffat10', 'golddiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
['golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
['xpdiffat10', 'xpdiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
['xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
LogisticRegression()
In [555]:
import matplotlib.pyplot as plt
# 1. Extract the preprocessor and logistic regression model
preprocessor = final1.named_steps['columntransformer']
logreg = final1.named_steps['logisticregression']
# 2. Get feature names from each transformer
# a) One-hot encoding features
onehot_features = preprocessor.named_transformers_['onehotencoder'].get_feature_names_out(
['side', 'firstbaron', 'firstdragon', 'firstblood']
)
# b) Custom feature names (manually named)
custom_features = [
'xp_per_min', 'gold_per_min', 'tower_score',
'gold_drop_10_15', 'gold_drop_15_20',
'xp_drop_10_15', 'xp_drop_15_20'
]
# 3. Combine all feature names
all_features = list(onehot_features) + custom_features
# 4. Extract logistic regression coefficients (flattened)
coefs = logreg.coef_.flatten()
# 5. Create the feature importance plot
plt.figure(figsize=(10, 6))
plt.barh(all_features, coefs)
plt.xlabel("Coefficient Value (Importance)")
plt.title("Feature Importance from Logistic Regression")
plt.grid(True)
plt.tight_layout()
plt.show()
In [556]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
y_pred = final1.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels=final1.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
display_labels=final1.classes_)
acc = accuracy_score(y_test, y_pred)
disp.plot()
plt.title('Confusion Matrix')
plt.grid(False)
plt.text(0.5, -0.15, f'Accuracy: {acc:.4f}',
fontsize=12,
ha='center',
va='center',
transform=plt.gca().transAxes)
plt.tight_layout()
plt.savefig("base_cm.png", dpi=150, bbox_inches='tight')
plt.show()
In [557]:
y_prob = final1.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Logistic Regression')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
plt.savefig("base_roc.png", dpi=300, bbox_inches='tight')
plt.show()
In [558]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
def final_model_2(X_train, y_train, k=10):
xp_per_min_transformer = make_pipeline(
FunctionTransformer(func=compute_per_min),
StandardScaler()
)
gold_per_min_transformer = make_pipeline(
FunctionTransformer(func=compute_per_min),
StandardScaler()
)
tower_score_transformer = make_pipeline(
FunctionTransformer(func=compute_tower_score),
StandardScaler()
)
gold_drop_1015_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
gold_drop_1520_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
xp_drop_1015_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
xp_drop_1520_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
preprocessor = make_column_transformer(
(OneHotEncoder(drop='first'), ['side', 'firstbaron', 'firstdragon', 'firstblood']),
(xp_per_min_transformer, ['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
(gold_per_min_transformer, ['golddiffat10', 'golddiffat15', 'golddiffat20']),
(tower_score_transformer, ['firsttower', 'firstmidtower', 'firsttothreetowers']),
(gold_drop_1015_transformer, ['golddiffat10', 'golddiffat15']),
(gold_drop_1520_transformer, ['golddiffat15', 'golddiffat20']),
(xp_drop_1015_transformer, ['xpdiffat10', 'xpdiffat15']),
(xp_drop_1520_transformer, ['xpdiffat15', 'xpdiffat20']),
)
pipe = make_pipeline(preprocessor, RandomForestClassifier(random_state=123))
param_grid = {
'randomforestclassifier__max_depth': np.arange(1, 11)
}
grid = GridSearchCV(pipe, param_grid, cv=k, scoring='roc_auc')
grid.fit(X_train, y_train)
return grid
final2 = final_model_2(X_train, y_train, 5)
final2
Out[558]:
GridSearchCV(cv=5,
estimator=Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first'),
['side',
'firstbaron',
'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
[...
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15',
'xpdiffat20'])])),
('randomforestclassifier',
RandomForestClassifier(random_state=123))]),
param_grid={'randomforestclassifier__max_depth': array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])},
scoring='roc_auc')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=5,
estimator=Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first'),
['side',
'firstbaron',
'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
[...
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15',
'xpdiffat20'])])),
('randomforestclassifier',
RandomForestClassifier(random_state=123))]),
param_grid={'randomforestclassifier__max_depth': array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])},
scoring='roc_auc')Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first'),
['side', 'firstbaron',
'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15',
'x...
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15']),
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15',
'xpdiffat20'])])),
('randomforestclassifier',
RandomForestClassifier(max_depth=6, random_state=123))])ColumnTransformer(transformers=[('onehotencoder', OneHotEncoder(drop='first'),
['side', 'firstbaron', 'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
('pipeline-2',
Pipeline(s...
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15']),
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15', 'xpdiffat20'])])['side', 'firstbaron', 'firstdragon', 'firstblood']
OneHotEncoder(drop='first')
['xpdiffat10', 'xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)
StandardScaler()
['golddiffat10', 'golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)
StandardScaler()
['firsttower', 'firstmidtower', 'firsttothreetowers']
FunctionTransformer(func=<function compute_tower_score at 0x138efb490>)
StandardScaler()
['golddiffat10', 'golddiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
['golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
['xpdiffat10', 'xpdiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
['xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
RandomForestClassifier(max_depth=6, random_state=123)
In [525]:
import matplotlib.pyplot as plt
# 1. Get the best estimator (pipeline) and the trained RandomForestClassifier
best_pipe = final2.best_estimator_
preprocessor = best_pipe.named_steps['columntransformer']
rf_model = best_pipe.named_steps['randomforestclassifier']
# 2. Extract feature names
onehot_features = preprocessor.named_transformers_['onehotencoder'].get_feature_names_out(
['side', 'firstbaron', 'firstdragon', 'firstblood']
)
# Custom engineered features (we know we created only one output from each of these)
custom_features = [
'xp_per_min', 'gold_per_min', 'tower_score',
'gold_drop_10_15', 'gold_drop_15_20',
'xp_drop_10_15', 'xp_drop_15_20'
]
# Combine all feature names
all_features = list(onehot_features) + custom_features
# 3. Get feature importances from the Random Forest
importances = rf_model.feature_importances_
# 4. Sort feature importances
indices = np.argsort(importances)[::-1]
sorted_features = [all_features[i] for i in indices]
sorted_importances = importances[indices]
# 5. Plot
plt.figure(figsize=(10, 6))
plt.barh(sorted_features, sorted_importances)
plt.gca().invert_yaxis() # Most important at the top
plt.xlabel("Feature Importance")
plt.title("Random Forest Feature Importances")
plt.tight_layout()
plt.grid(True)
plt.show()
In [559]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
y_pred = final2.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels=final2.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
display_labels=final2.classes_)
acc = accuracy_score(y_test, y_pred)
disp.plot()
plt.title('Confusion Matrix')
plt.grid(False)
plt.text(0.5, -0.15, f'Accuracy: {acc:.4f}',
fontsize=12,
ha='center',
va='center',
transform=plt.gca().transAxes)
plt.tight_layout()
plt.savefig("base_cm.png", dpi=150, bbox_inches='tight')
plt.show()
In [560]:
y_prob = final2.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Random Forest')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
plt.savefig("base_roc.png", dpi=300, bbox_inches='tight')
plt.show()
In [561]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.model_selection import GridSearchCV
def final_model_3(X_train, y_train):
xp_per_min_transformer = make_pipeline(
FunctionTransformer(func=compute_per_min),
StandardScaler()
)
gold_per_min_transformer = make_pipeline(
FunctionTransformer(func=compute_per_min),
StandardScaler()
)
tower_score_transformer = make_pipeline(
FunctionTransformer(func=compute_tower_score),
StandardScaler()
)
gold_drop_1015_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
gold_drop_1520_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
xp_drop_1015_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
xp_drop_1520_transformer = make_pipeline(
FunctionTransformer(func=compute_diff_drop),
StandardScaler()
)
preprocessor = make_column_transformer(
(OneHotEncoder(drop='first'), ['side', 'firstbaron', 'firstdragon', 'firstblood']),
(xp_per_min_transformer, ['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
(gold_per_min_transformer, ['golddiffat10', 'golddiffat15', 'golddiffat20']),
(tower_score_transformer, ['firsttower', 'firstmidtower', 'firsttothreetowers']),
(gold_drop_1015_transformer, ['golddiffat10', 'golddiffat15']),
(gold_drop_1520_transformer, ['golddiffat15', 'golddiffat20']),
(xp_drop_1015_transformer, ['xpdiffat10', 'xpdiffat15']),
(xp_drop_1520_transformer, ['xpdiffat15', 'xpdiffat20']),
)
pipe = make_pipeline(
preprocessor,
DecisionTreeClassifier(random_state=123)
)
param_grid = {
'decisiontreeclassifier__max_depth': np.arange(1, 11)
}
grid = GridSearchCV(pipe, param_grid, cv=5, scoring='roc_auc')
grid.fit(X_train, y_train)
return grid
final3 = final_model_3(X_train, y_train)
final3
Out[561]:
GridSearchCV(cv=5,
estimator=Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first'),
['side',
'firstbaron',
'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
[...
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15',
'xpdiffat20'])])),
('decisiontreeclassifier',
DecisionTreeClassifier(random_state=123))]),
param_grid={'decisiontreeclassifier__max_depth': array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])},
scoring='roc_auc')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=5,
estimator=Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first'),
['side',
'firstbaron',
'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
[...
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15',
'xpdiffat20'])])),
('decisiontreeclassifier',
DecisionTreeClassifier(random_state=123))]),
param_grid={'decisiontreeclassifier__max_depth': array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])},
scoring='roc_auc')Pipeline(steps=[('columntransformer',
ColumnTransformer(transformers=[('onehotencoder',
OneHotEncoder(drop='first'),
['side', 'firstbaron',
'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15',
'x...
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15']),
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15',
'xpdiffat20'])])),
('decisiontreeclassifier',
DecisionTreeClassifier(max_depth=5, random_state=123))])ColumnTransformer(transformers=[('onehotencoder', OneHotEncoder(drop='first'),
['side', 'firstbaron', 'firstdragon',
'firstblood']),
('pipeline-1',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15', 'xpdiffat20']),
('pipeline-2',
Pipeline(s...
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat10', 'xpdiffat15']),
('pipeline-7',
Pipeline(steps=[('functiontransformer',
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)),
('standardscaler',
StandardScaler())]),
['xpdiffat15', 'xpdiffat20'])])['side', 'firstbaron', 'firstdragon', 'firstblood']
OneHotEncoder(drop='first')
['xpdiffat10', 'xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)
StandardScaler()
['golddiffat10', 'golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_per_min at 0x138ef8d30>)
StandardScaler()
['firsttower', 'firstmidtower', 'firsttothreetowers']
FunctionTransformer(func=<function compute_tower_score at 0x138efb490>)
StandardScaler()
['golddiffat10', 'golddiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
['golddiffat15', 'golddiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
['xpdiffat10', 'xpdiffat15']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
['xpdiffat15', 'xpdiffat20']
FunctionTransformer(func=<function compute_diff_drop at 0x3065ae830>)
StandardScaler()
DecisionTreeClassifier(max_depth=5, random_state=123)
In [533]:
import matplotlib.pyplot as plt
import numpy as np
# 1. Get the best pipeline from GridSearchCV
best_pipe = final3.best_estimator_
preprocessor = best_pipe.named_steps['columntransformer']
tree_model = best_pipe.named_steps['decisiontreeclassifier']
# 2. Extract feature names
onehot_features = preprocessor.named_transformers_['onehotencoder'].get_feature_names_out(
['side', 'firstbaron', 'firstdragon', 'firstblood']
)
custom_features = [
'xp_per_min', 'gold_per_min', 'tower_score',
'gold_drop_10_15', 'gold_drop_15_20',
'xp_drop_10_15', 'xp_drop_15_20'
]
all_features = list(onehot_features) + custom_features
# 3. Get feature importances
importances = tree_model.feature_importances_
# 4. Sort importances
indices = np.argsort(importances)[::-1]
sorted_features = [all_features[i] for i in indices]
sorted_importances = importances[indices]
# 5. Plot
plt.figure(figsize=(10, 6))
plt.barh(sorted_features, sorted_importances)
plt.gca().invert_yaxis()
plt.xlabel("Feature Importance")
plt.title("Decision Tree Feature Importances")
plt.grid(True)
plt.tight_layout()
plt.show()
In [562]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
y_pred = final3.predict(X_test)
cm = confusion_matrix(y_test, y_pred, labels=final3.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
display_labels=final3.classes_)
acc = accuracy_score(y_test, y_pred)
disp.plot()
plt.title('Confusion Matrix')
plt.grid(False)
plt.text(0.5, -0.15, f'Accuracy: {acc:.4f}',
fontsize=12,
ha='center',
va='center',
transform=plt.gca().transAxes)
plt.tight_layout()
plt.savefig("base_cm.png", dpi=150, bbox_inches='tight')
plt.show()
In [563]:
y_prob = final3.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC Curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Gradient Boosting')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
plt.savefig("base_roc.png", dpi=300, bbox_inches='tight')
plt.show()
In [564]:
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
# Store models and labels
models = {
'Logistic Regression (base)': base,
'Logistic Regression (final1)': final1,
'Random Forest (final2)': final2,
'Decision Tree (final3)': final3,
}
plt.figure(figsize=(10, 7))
for label, model in models.items():
y_prob = model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, lw=2, label=f'{label} (AUC = {roc_auc:.2f})')
# Diagonal baseline
plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
# Labels and style
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for All Models')
plt.legend(loc='lower right')
plt.grid(True)
plt.tight_layout()
# Save plot
plt.savefig("all_models_roc.png", dpi=300, bbox_inches='tight')
plt.show()
In [ ]: